# Discretion in Clinical Decision-Making: Evidence from Bunching
# Claire Boone
# Bunching estimation and robustness
# Creates tables A6, A7


# set up -----------------
  library(tidyverse)
  library(dplyr)
  library(magrittr)
  library(bunching)
  library(here)
  
  proj_dir <- here::here()
  raw_data <- file.path(proj_dir, "raw_data/")
  gen_data <- file.path(proj_dir, "gen_data/")
  out <- file.path(proj_dir, "out/")
    
# load data ---------------------------
  load(paste0(gen_data,"ehr_cleaned_analysis_sample.Rda"))   
  
# cleaning ----------------------------
  hd_all_visis <- hd
  # 1 obs per patient for bunching histograms
  hd %<>% dplyr::filter(rows_n==1) 
  
  hd$diag_htn1 <- ifelse(is.na(hd$diag_cv), NA, hd$diag_htn)
  hd %<>% dplyr::mutate(diag_htn10 = ifelse(is.na(diag_htn1), 0, diag_htn1))
    
  hd %<>% group_by(cod_est) %<>% arrange(fecha_atencion) %<>%
    dplyr::mutate(rows_clinic = row_number()) %<>% 
    dplyr::mutate(rows_max_clinic = max(rows_clinic)) %<>%
    ungroup()
  
  hist(hd[hd$rows_clinic==1,]$rows_max_clinic, breaks=100)
  summary(hd[hd$rows_clinic==1,]$rows_max_clinic)
  
  # keeping only the clinics with >= median N visits
  hd %<>% dplyr::filter(rows_max_clinic>=1265)
  length(unique(hd$cod_est))
  
  
# classify clinics into those with normal vs. excess rounding ------------------------ 

  # end digit:
  hd$last_digit <- hd$pa_sys %% 10
  
  hd$last_digit0 <- ifelse(hd$last_digit==0, 1, 0)
  hd$last_digit1 <- ifelse(hd$last_digit==1, 1, 0)
  hd$last_digit2 <- ifelse(hd$last_digit==2, 1, 0)
  hd$last_digit3 <- ifelse(hd$last_digit==3, 1, 0)
  hd$last_digit4 <- ifelse(hd$last_digit==4, 1, 0)
  hd$last_digit5 <- ifelse(hd$last_digit==5, 1, 0)
  hd$last_digit6 <- ifelse(hd$last_digit==6, 1, 0)
  hd$last_digit7 <- ifelse(hd$last_digit==7, 1, 0)
  hd$last_digit8 <- ifelse(hd$last_digit==8, 1, 0)
  hd$last_digit9 <- ifelse(hd$last_digit==9, 1, 0)
  
  hdc <- hd
  hdc %<>% group_by(cod_est) %<>%
    summarise(share0 = mean(last_digit0), share1 = mean(last_digit1), share2 = mean(last_digit2), 
              share3 = mean(last_digit3), share4 = mean(last_digit4), share5 = mean(last_digit5), 
              share6 = mean(last_digit6), share7 = mean(last_digit7), share8 = mean(last_digit8), 
              share9 = mean(last_digit9), visits = n())
  hdc %<>% dplyr::mutate(excess0 = share0 - 0.1,
                         excess1 = share1 - 0.1,
                         excess2 = share2 - 0.1,
                         excess3 = share3 - 0.1,
                         excess4 = share4 - 0.1,
                         excess5 = share5 - 0.1,
                         excess6 = share6 - 0.1,
                         excess7 = share7 - 0.1,
                         excess8 = share8 - 0.1,
                         excess9 = share9 - 0.1)
    
    # define low rounding as clinics with excess zeros <=20%, <=10%, <=5%
    hdc %<>% dplyr::mutate(low_rounding05 = ifelse(excess0<=0.05, 1, 0),
                           low_rounding10 = ifelse(excess0<=0.1, 1, 0),
                           low_rounding20 = ifelse(excess0<=0.2, 1, 0))
    
    # drop clinics that break bunching estimation loops:
    hdc %<>% dplyr::filter(cod_est!=104316 & cod_est!=107317 & cod_est!=114334 & cod_est!=109303 & cod_est!=115305)

    hdc %<>% arrange(cod_est) %<>% group_by(low_rounding05) %<>%
      dplyr::mutate(cid5 = row_number()) %<>% ungroup()
    hdc %<>% arrange(cod_est) %<>% group_by(low_rounding10) %<>%
      dplyr::mutate(cid10 = row_number()) %<>% ungroup()
    hdc %<>% arrange(cod_est) %<>% group_by(low_rounding20) %<>%
      dplyr::mutate(cid20 = row_number()) %<>% ungroup()
    
    hd <- dplyr::left_join(hd, hdc, by="cod_est") 
    
# bunching loop 1 --- low rounders -- 5% level    -------------
    hdl5 <- hd
    hdl5 %<>% dplyr::filter(low_rounding05==1)
    
    out_lowround_05 <- NA
    out_lowround_05 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_lowround_05) <- c("cid5", "B_05", "B_sd_05", "b_05", "b_sd_05", "e_05", "mean_marg_05", "sd_marg_05")
    
    for (x in c(1:length(unique(hdl5$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdl5[hdl5$cid5==x,]$pa_sys, zstar = 140, binwidth = 5, bins_l = 8, bins_r = 8,
            poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
            p_b = TRUE, seed = 69, rn = c(10),
            p_title = paste0("Notch (low rounding to zero clinics) cid5=", x),
            p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
            p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
    
    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_lowround_05_s1.pdf"), width = 5, height = 3)
    
    
    # save results
    out_lowround_05[x,1] <- x
    out_lowround_05[x, 2] <- bunch$B
    out_lowround_05[x, 3] <- bunch$B_sd
    out_lowround_05[x, 4] <- bunch$b
    out_lowround_05[x, 5] <- bunch$b_sd
    out_lowround_05[x, 6] <- bunch$e  
    out_lowround_05[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_lowround_05[x, 8] <- bunch$marginal_buncher_sd
  
    }
    
    write.csv(out_lowround_05, file = paste0(out, "out_lowround_05_s1.csv"))
    save(out_lowround_05, file = paste0(gen_data,"out_lowround_05_s1.Rda"))
    
# bunching loop 2 --- low rounders -- 10% level    -------------
  
    hdl10 <- hd
    hdl10 %<>% dplyr::filter(low_rounding10==1)
    
    out_lowround_10 <- NA
    out_lowround_10 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_lowround_10) <- c("cid10", "B_10", "B_sd_10", "b_10", "b_sd_10", "e_10", "mean_marg_10", "sd_marg_10")
    
    for (x in c(1:length(unique(hdl10$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdl10[hdl10$cid10==x,]$pa_sys, zstar = 140, binwidth = 5, bins_l = 8, bins_r = 8,
                     poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                     p_b = TRUE, seed = 69, rn = c(10),
                       p_title = paste0("Notch (low rounding to zero clinics) cid10=", x),
                       p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                       p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
    
    
    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_lowround_10_s2.pdf"), width = 5, height = 3)
    
    
    # save results
    out_lowround_10[x,1] <- x
    out_lowround_10[x, 2] <- bunch$B
    out_lowround_10[x, 3] <- bunch$B_sd
    out_lowround_10[x, 4] <- bunch$b
    out_lowround_10[x, 5] <- bunch$b_sd
    out_lowround_10[x, 6] <- bunch$e  
    out_lowround_10[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_lowround_10[x, 8] <- bunch$marginal_buncher_sd
  
    }
    
     write.csv(out_lowround_10, file = paste0(out, "out_lowround_10_s2.csv"))
     save(out_lowround_10, file = paste0(gen_data,"out_lowround_10_s2.Rda"))

# bunching loop 3 --- low rounders -- 20% level    -------------
  
    hdl20 <- hd
    hdl20 %<>% dplyr::filter(low_rounding20==1)
    
    out_lowround_20 <- NA
    out_lowround_20 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_lowround_20) <- c("cid20", "B_20", "B_sd_20", "b_20", "b_sd_20", "e_20", "mean_marg_20", "sd_marg_20")
    
    for (x in c(1:length(unique(hdl20$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdl20[hdl20$cid20==x,]$pa_sys, zstar = 140, binwidth = 5, bins_l = 8, bins_r = 8,
                     poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                     p_b = TRUE, seed = 69, rn = c(10),
                       p_title = paste0("Notch (low rounding to zero clinics) cid20=", x),
                       p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                       p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
   
    
    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_lowround_20_s3.pdf"), width = 5, height = 3)
    
    
    # save results
    out_lowround_20[x,1] <- x
    out_lowround_20[x, 2] <- bunch$B
    out_lowround_20[x, 3] <- bunch$B_sd
    out_lowround_20[x, 4] <- bunch$b
    out_lowround_20[x, 5] <- bunch$b_sd
    out_lowround_20[x, 6] <- bunch$e  
    out_lowround_20[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_lowround_20[x, 8] <- bunch$marginal_buncher_sd
  
    }
    
    write.csv(out_lowround_20, file = paste0(out, "out_lowround_20_s3.csv"))
    save(out_lowround_20, file = paste0(gen_data,"out_lowround_20_s3.Rda"))

    
     
# bunching loop 4 --- high rounders -- 5% level    -----------------
  
    hdh5 <- hd
    
    out_highround_05 <- NA
    out_highround_05 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_highround_05) <- c("cid5", "B_05", "B_sd_05", "b_05", "b_sd_05", "e_05", "mean_marg_05", "sd_marg_05")
    
    for (x in c(1:length(unique(hdh5$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdh5[hdh5$cid5==x,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                     poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                     p_b = TRUE, seed = 69, rn = c(10), 
                       p_title = paste0("Notch (high rounding to zero clinics) cid5=", x),
                       p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                       p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
    

    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_highround_05_s4.pdf"), width = 5, height = 3)
    
    
    # save results
    out_highround_05[x,1] <- x
    out_highround_05[x, 2] <- bunch$B
    out_highround_05[x, 3] <- bunch$B_sd
    out_highround_05[x, 4] <- bunch$b
    out_highround_05[x, 5] <- bunch$b_sd
    out_highround_05[x, 6] <- bunch$e  
    out_highround_05[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_highround_05[x, 8] <- bunch$marginal_buncher_sd
  
    }   
    
    write.csv(out_highround_05, file = paste0(out, "out_highround_05_s4.csv"))
    save(out_highround_05, file = paste0(gen_data,"out_highround_05_s4.Rda"))
    
    
        
     
# bunching loop 5 --- high rounders -- 10% level    -----------------
  
    hdh10 <- hd
    hdh10 %<>% dplyr::filter(low_rounding10==0)
    
    out_highround_10 <- NA
    out_highround_10 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_highround_10) <- c("cid10", "B_10", "B_sd_10", "b_10", "b_sd_10", "e_10", "mean_marg_10", "sd_marg_10")
    
    for (x in c(1:length(unique(hdh10$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdh10[hdh10$cid10==x,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                     poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                     p_b = TRUE, seed = 69, rn = c(10), 
                       p_title = paste0("Notch (high rounding to zero clinics) cid10=", x),
                       p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                       p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
    
    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_highround_10_s5.pdf"), width = 5, height = 3)
    
    
    # save results
    out_highround_10[x,1] <- x
    out_highround_10[x, 2] <- bunch$B
    out_highround_10[x, 3] <- bunch$B_sd
    out_highround_10[x, 4] <- bunch$b
    out_highround_10[x, 5] <- bunch$b_sd
    out_highround_10[x, 6] <- bunch$e  
    out_highround_10[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_highround_10[x, 8] <- bunch$marginal_buncher_sd
  
    }   
    
    write.csv(out_highround_10, file = paste0(out, "out_highround10_s5.csv"))
    save(out_highround_10, file = paste0(gen_data,"out_highround_10_s5.Rda"))
            
# bunching loop 6 --- high rounders -- 20% level    -----------------
  
    hdh20 <- hd
    hdh20 %<>% dplyr::filter(low_rounding20==0)
    
    out_highround_20 <- NA
    out_highround_20 <- data.frame(matrix(nrow = 3, ncol = 8))
    colnames(out_highround_20) <- c("cid20", "B_20", "B_sd_20", "b_20", "b_sd_20", "e_20", "mean_marg_20", "sd_marg_20")
    
    for (x in c(1:length(unique(hdh20$cod_est)))) {
    
    # bunching estimator
    bunch <- bunchit(z_vector = hdh20[hdh20$cid20==x,]$pa_sys, zstar = 140, binwidth = 2, bins_l = 20, bins_r = 20,
                     poly = 9, t0 = 0, t1 = 1, notch=F, binv = "min",
                     p_b = TRUE, seed = 69, rn = c(10), 
                       p_title = paste0("Notch (high rounding to zero clinics) cid20=", x),
                       p_xtitle = "Systolic blood pressure", p_ytitle = "Visits",
                       p_zstar_color = "mediumblue", p_cf_color = "cornflowerblue")
    
    
    # plot
    bp <- bunch$plot
    ggsave(bp, file = paste0(out, x, "_id_bunchplot_highround_20_s6.pdf"), width = 5, height = 3)
    
    
    # save results
    out_highround_20[x,1] <- x
    out_highround_20[x, 2] <- bunch$B
    out_highround_20[x, 3] <- bunch$B_sd
    out_highround_20[x, 4] <- bunch$b
    out_highround_20[x, 5] <- bunch$b_sd
    out_highround_20[x, 6] <- bunch$e  
    out_highround_20[x, 7] <- mean(bunch$marginal_buncher_vector)    
    out_highround_20[x, 8] <- bunch$marginal_buncher_sd
  
    }   
    
    write.csv(out_highround_20, file = paste0(out, "out_highround20_s6.csv"))
    save(out_highround_20, file = paste0(gen_data,"out_highround_20_s6.Rda"))
    
    
#--- Tables A6, A7--------
# specification 1 -----    
    s1 <- read.csv(paste0(out, "out_lowround_05_s1.csv"))
    
    s1 %<>% dplyr::mutate(tstat = b_05/b_sd_05,
                                      tstat_abs = abs(tstat),
                                      sig05 =  ifelse(tstat_abs>=1.96, 1, 0),
                                      negative_bunch = ifelse(b_05<0, 1, 0),
                                      negative_sig_bunch = ifelse(b_05<0 & sig05==1, 1, 0),
                                      b_05_neg_sig = ifelse(negative_sig_bunch==1, b_05, NA))
    
    s1 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_05_neg_sig = mean(b_05_neg_sig, na.rm=T),
      median_b_05_neg_sig = median(b_05_neg_sig, na.rm=T),
      min_b_05_neg_sig = min(b_05_neg_sig, na.rm=T),
      max_b_05_neg_sig = max(b_05_neg_sig, na.rm=T)
    )
    write.csv(s1, file =paste0(out, "s1.csv"))    
    
   
    
    
    
# specification 2 -----    
    s3 <- read.csv(paste0(out, "out_lowround_20_s3.csv"))
    
    s3 %<>% dplyr::mutate(tstat = b_20/b_sd_20,
                          tstat_abs = abs(tstat),
                          sig20 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_20<0, 1, 0),
                          negative_sig_bunch = ifelse(b_20<0 & sig20==1, 1, 0),
                          b_20_neg_sig = ifelse(negative_sig_bunch==1, b_20, NA))
    
    s3 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_20_neg_sig = mean(b_20_neg_sig, na.rm=T),
      median_b_20_neg_sig = median(b_20_neg_sig, na.rm=T),
      min_b_20_neg_sig = min(b_20_neg_sig, na.rm=T),
      max_b_20_neg_sig = max(b_20_neg_sig, na.rm=T)
    )
    write.csv(s3, file =paste0(out, "s3.csv"))    
    
    
    
    
    
# specification 3 -----    
    s4 <- read.csv(paste0(out, "out_highround_05_s4.csv"))
    
    s4 %<>% dplyr::mutate(tstat = b_05/b_sd_05,
                          tstat_abs = abs(tstat),
                          sig05 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_05<0, 1, 0),
                          negative_sig_bunch = ifelse(b_05<0 & sig05==1, 1, 0),
                          b_05_neg_sig = ifelse(negative_sig_bunch==1, b_05, NA))
    
    s4 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_05_neg_sig = mean(b_05_neg_sig, na.rm=T),
      median_b_05_neg_sig = median(b_05_neg_sig, na.rm=T),
      min_b_05_neg_sig = min(b_05_neg_sig, na.rm=T),
      max_b_05_neg_sig = max(b_05_neg_sig, na.rm=T)
    )
    write.csv(s4, file =paste0(out, "s4.csv"))    
    
    
    
    
    
# specification 4 -----    
    s6 <- read.csv(paste0(out, "out_highround20_s6.csv"))
    
    s6 %<>% dplyr::mutate(tstat = b_20/b_sd_20,
                          tstat_abs = abs(tstat),
                          sig20 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_20<0, 1, 0),
                          negative_sig_bunch = ifelse(b_20<0 & sig20==1, 1, 0),
                          b_20_neg_sig = ifelse(negative_sig_bunch==1, b_20, NA))
    
    s6 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_20_neg_sig = mean(b_20_neg_sig, na.rm=T),
      median_b_20_neg_sig = median(b_20_neg_sig, na.rm=T),
      min_b_20_neg_sig = min(b_20_neg_sig, na.rm=T),
      max_b_20_neg_sig = max(b_20_neg_sig, na.rm=T)
    )
    write.csv(s6, file =paste0(out, "s6.csv"))    
    
    
    
    
    
# specification 5 -----    
    s8 <- read.csv(paste0(out, "out_lowround_10_s8.csv"))
    
    s8 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s8 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s8, file =paste0(out, "s8.csv"))    
    
    
    
# specification 6 -----    
    s10 <- read.csv(paste0(out, "out_highround10_s10.csv"))
    
    s10 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s10 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s10, file =paste0(out, "s10.csv"))      
    
  
# specification 7 -----    
    s14 <- read.csv(paste0(out, "out_lowround_10_s14.csv"))
    
    s14 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s14 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s14, file =paste0(out, "s14.csv"))    
    
    
    
# specification 8 -----    
    s16 <- read.csv(paste0(out, "out_highround10_s16.csv"))
    
    s16 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s16 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s16, file =paste0(out, "s16.csv")) 
# specification 9 -----    
    s19 <- read.csv(paste0(out, "out_lowround_10_s19.csv"))
    
    s19 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s19 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s19, file =paste0(out, "s19.csv"))    
    
    
# specification 10 -----    
    s21 <- read.csv(paste0(out, "out_lowround_10_s21.csv"))
    
    s21 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s21 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s21, file =paste0(out, "s21.csv"))    
    
    
    
    
# specification 11-----    
    s22 <- read.csv(paste0(out, "out_highround10_s22.csv"))
    
    s22 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s22 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s22, file =paste0(out, "s22.csv")) 
    
# specification 12-----    
    s24 <- read.csv(paste0(out, "out_highround10_s24.csv"))
    
    s24 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s24 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s24, file =paste0(out, "s24.csv"))   
    
# specification 13-----    
    s25 <- read.csv(paste0(out, "out_lowround_10_s25.csv"))
    
    s25 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s25 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s25, file =paste0(out, "s25.csv"))  
    
# specification 14-----    
    s26 <- read.csv(paste0(out, "out_highround10_s26.csv"))
    
    s26 %<>% dplyr::mutate(tstat = b_10/b_sd_10,
                          tstat_abs = abs(tstat),
                          sig10 =  ifelse(tstat_abs>=1.96, 1, 0),
                          negative_bunch = ifelse(b_10<0, 1, 0),
                          negative_sig_bunch = ifelse(b_10<0 & sig10==1, 1, 0),
                          b_10_neg_sig = ifelse(negative_sig_bunch==1, b_10, NA))
    
    s26 %<>% dplyr::summarise(
      n_clinincs = n(),
      n_neg = sum(negative_bunch),
      n_neg_sig = sum(negative_sig_bunch),
      mean_b_10_neg_sig = mean(b_10_neg_sig, na.rm=T),
      median_b_10_neg_sig = median(b_10_neg_sig, na.rm=T),
      min_b_10_neg_sig = min(b_10_neg_sig, na.rm=T),
      max_b_10_neg_sig = max(b_10_neg_sig, na.rm=T)
    )
    write.csv(s26, file =paste0(out, "s26.csv"))     
    
## load bunching data -------------------------------------
    load(paste0(gen_data,"out_lowround_05_sept.Rda"))  
    load(paste0(gen_data,"out_lowround_10_sept.Rda"))  
    load(paste0(gen_data,"out_lowround_20_sept.Rda")) 
    
    load(paste0(gen_data,"out_highround_05_sept.Rda"))  
    load(paste0(gen_data,"out_highround_10_sept.Rda"))  
    load(paste0(gen_data,"out_highround_20_sept.Rda")) 
#----------------------------------------------------------#    
# combine all rda files and get cod_est --------------------------
     load(paste0(gen_data,"out_lowround_05_sept.Rda"))  
     load(paste0(gen_data,"out_lowround_10_sept.Rda"))  
     load(paste0(gen_data,"out_lowround_20_sept.Rda")) 
     
     out_lowround_05$low_rounding05 <- NA
     out_lowround_05$low_rounding05 <- 1
     
     out_lowround_10$low_rounding10 <- NA
     out_lowround_10$low_rounding10 <- 1
     
     out_lowround_20$low_rounding20 <- NA
     out_lowround_20$low_rounding20 <- 1
     
     load(paste0(gen_data,"out_highround_05_sept.Rda"))  
     load(paste0(gen_data,"out_highround_10_sept.Rda"))  
     load(paste0(gen_data,"out_highround_20_sept.Rda")) 

     out_highround_05$low_rounding05 <- NA
     out_highround_05$low_rounding05 <- 0

     out_highround_10$low_rounding10 <- NA
     out_highround_10$low_rounding10 <- 0

     out_highround_20$low_rounding20 <- NA
     out_highround_20$low_rounding20 <- 0
     
     five  <- rbind(out_highround_05, out_lowround_05)
     ten  <- rbind(out_highround_10, out_lowround_10)
     twenty  <- rbind(out_highround_20, out_lowround_20)
     
     # join with clinic-level dataset
     hdc <- dplyr::left_join(hdc, five, by = c("cid5", "low_rounding05")) 
     hdc <- dplyr::left_join(hdc, ten, by = c("cid10", "low_rounding10")) 
     hdc <- dplyr::left_join(hdc, twenty, by = c("cid20", "low_rounding20")) 
     
     # join with visit level dataset
     hdc <- hdc[ -c(2:28) ]
     hd <- dplyr::left_join(hd, hdc, by=c("cod_est"))
     
    
#---- save ------    
      save(hdc, file = paste0(gen_data,"clinic_bunching.Rda"))
      save(hd, file = paste0(gen_data,"visits_bunching.Rda"))
   
